library(here)here() starts at /Users/benarnold/quantAb-annecy
here()[1] "/Users/benarnold/quantAb-annecy"
library(tidyverse)[30m── [1mAttaching packages[22m ────────────────────────────────────────────────── tidyverse 1.2.1 ──[39m
[30m[32m✔[30m [34mggplot2[30m 2.2.1 [32m✔[30m [34mpurrr [30m 0.2.4
[32m✔[30m [34mtibble [30m 1.4.2 [32m✔[30m [34mdplyr [30m 0.7.4
[32m✔[30m [34mtidyr [30m 0.8.0 [32m✔[30m [34mstringr[30m 1.3.1
[32m✔[30m [34mreadr [30m 1.1.1 [32m✔[30m [34mforcats[30m 0.3.0[39m
[30m── [1mConflicts[22m ───────────────────────────────────────────────────── tidyverse_conflicts() ──
[31m✖[30m [34mdplyr[30m::[32mfilter()[30m masks [34mstats[30m::filter()
[31m✖[30m [34mdplyr[30m::[32mlag()[30m masks [34mstats[30m::lag()[39m
library(mgcv)Loading required package: nlme
Attaching package: ‘nlme’
The following object is masked from ‘package:dplyr’:
collapse
This is mgcv 1.8-23. For overview type 'help("mgcv-package")'.
library(gridExtra)
Attaching package: ‘gridExtra’
The following object is masked from ‘package:dplyr’:
combine
# set up for parallel computing
# configure for a laptop (use only 3 cores)
library(foreach)
Attaching package: ‘foreach’
The following objects are masked from ‘package:purrr’:
accumulate, when
library(doParallel)Loading required package: iterators
Loading required package: parallel
registerDoParallel(cores=3)
# grab some colors for plotting
ggplotcols <- scales::hue_pal()(3)
# bright color blind palette: https://personal.sron.nl/~pault/
cblack <- "#000004FF"
cblue <- "#3366AA"
cteal <- "#11AA99"
cgreen <- "#66AA55"
cchartr <- "#CCCC55"
cmagent <- "#992288"
cred <- "#EE3333"
corange <- "#EEA722"
cyellow <- "#FFEE33"
cgrey <- "#777777"
pcols <- c(ggplotcols,corange)Note: at this time these data are not yet publicly available. They are from this study: Won KY, Kanyi HM, Mwende FM, Wiegand RE, Brook Goodhew E, Priest JW, et al. Multiplex Serologic Assessment of Schistosomiasis in Western Kenya: Antibody Responses in Preschool Age Children as a Measure of Reduced Transmission. Am J Trop Med Hyg. 2017; 16–0665. https://www.ncbi.nlm.nih.gov/pubmed/28719280
d <- readRDS(here("data","mbita_psac.rds"))
# create age strata
d <- d %>%
select(year,community=vid,
pid,age=agey,
asc_epg,sm_epg,
ashb,sea,msp1,vsp5) %>%
mutate(agecat = cut(age,breaks=c(0,1,2,3,4,6),labels=c("<1 year","1 year","2 years","3 years","4 years")))
# reshape KK data to long format
# the eggs per gram (EPG) measures are grouped by "antigen"
# (a wrong label) just to make them easy to merge back to the antibody data
dlkk <- d %>%
select(year,community,pid,asc_epg,sm_epg,) %>%
gather(key=antigen,value=epg,-year,-community,-pid) %>%
mutate(antigen = factor(antigen,levels=c("asc_epg","sm_epg"),labels=c("ashb","sea"))) %>%
group_by(year,community,antigen) %>%
arrange(year,community,antigen)
# reshape antibody data to long format
dl <- d %>%
select(year,community,pid,age,agecat,ashb,sea,msp1,vsp5) %>%
gather(key=antigen,value=mfi,-year,-community,-pid,-age,-agecat) %>%
mutate(antigen = factor(antigen,levels=c("ashb","sea","msp1","vsp5"))) %>%
group_by(year,community,antigen) %>%
arrange(year,community,antigen,age)
dl <- dl %>%
mutate(logmfi = ifelse(mfi>0,log10(mfi),log10(1)))
# add ROC-based cutoffs
dl$roccut <- NA
dl$roccut[dl$antigen %in% "ashb"] <- log10(418)
dl$roccut[dl$antigen %in% "sea"] <- log10(965)
dl$roccut[dl$antigen %in% "msp1"] <- log10(170)
dl$roccut[dl$antigen %in% "vsp5"] <- log10(281)
# define seroprevalence based on ROC cutoffs
dl <- dl %>%
mutate(seropos = ifelse(logmfi>roccut,1,0))
# create formatted antigen labels
dl <- dl %>%
mutate(antigenf=factor(antigen,levels=c("ashb","sea","msp1","vsp5"),
labels=c("Ascaris spp. AsHb",
"S. mansoni SEA",
"P. falciparum MSP-1",
"Giardia sp. VSP-5")))
# merge in the KK measurements for Schisto and Ascaris
dl <- left_join(dl,dlkk,by=c("year","community","pid","antigen"))table(dl$agecat,dl$antigenf)
Ascaris spp. AsHb S. mansoni SEA P. falciparum MSP-1 Giardia sp. VSP-5
<1 year 51 51 51 51
1 year 574 574 570 570
2 years 782 782 780 780
3 years 900 900 890 890
4 years 1378 1378 1372 1372
pdist <- ggplot(data=dl,aes(x=logmfi,fill=antigen)) +
facet_grid(agecat~antigenf) +
geom_density(alpha=0.5,color=NA) +
geom_vline(aes(xintercept = roccut),lty=1) +
scale_fill_manual(values=pcols)+
# geom_vline(aes(xintercept = mixcut),lty=2) +
labs(x="log10 luminex response (MFI-bg)") +
theme_minimal() +
theme(legend.position = "none",
strip.text.y=element_text(angle=0))
pdist# limit to just SEA and Giardia for presentation
dlp <- dl %>% filter(antigen %in% c("ashb","sea","vsp5") ) %>%
mutate(antigenf=factor(antigenf,levels=c("S. mansoni SEA","Giardia sp. VSP-5","Ascaris spp. AsHb")))
pdist2 <- ggplot(data=dlp,aes(x=logmfi,fill=antigen)) +
facet_grid(agecat~antigenf) +
geom_density(alpha=0.75,color=NA) +
geom_vline(aes(xintercept = roccut),lty=1) +
scale_fill_manual(values=pcols[c(1,2,4)])+
# geom_vline(aes(xintercept = mixcut),lty=2) +
labs(x="log10 luminex response (MFI-bg)") +
theme_minimal() +
theme(legend.position = "none",
strip.text.y=element_text(angle=0,size=14),
strip.text.x=element_text(size=14))
pdist2
# save png file for presentation
ggsave(filename=here("output","mbita-ab-dists-ashb-sea-vsp.png"),plot=pdist2,device="png",width=8,height=7)pdist3 <- ggplot(data=dl,aes(x=logmfi,fill=antigenf)) +
facet_grid(.~antigenf) +
geom_density(alpha=0.5,color=NA) +
geom_vline(aes(xintercept = roccut),lty=1) +
scale_fill_manual(values=pcols)+
# geom_vline(aes(xintercept = mixcut),lty=2) +
labs(x="log10 luminex response (MFI-bg)") +
theme_minimal() +
theme(legend.position = "none",
strip.text.y=element_text(angle=0))
pdist3
# save png file for presentation
ggsave(filename=here("output","mbita-ab-dists.png"),plot=pdist3,device="png",width=8,height=2)#----------------------------------
# simulataneous CIs for GAMs
# estimated by resampling the
# Baysian posterior estimates of
# the variance-covariance matrix
# assuming that it is multivariate normal
# the function below also estimates
# the unconditional variance-covariance
# matrix, Vb=vcov(x,unconditional=TRUE),
# which allows for undertainty in the actual
# estimated mean as well
# (Marra & Wood 2012 Scandinavian Journal of Statistics,
# Vol. 39: 53–74, 2012, doi: 10.1111/j.1467-9469.2011.00760.x )
# simultaneous CIs provide much better coverage than pointwise CIs
# see: http://www.fromthebottomoftheheap.net/2016/12/15/simultaneous-interval-revisited/
#----------------------------------
gamCI <- function(m,newdata,nreps=10000) {
require(mgcv)
require(dplyr)
Vb <- vcov(m,unconditional = TRUE)
pred <- predict(m, newdata, se.fit = TRUE)
fit <- pred$fit
se.fit <- pred$se.fit
BUdiff <- MASS::mvrnorm(n=nreps, mu = rep(0, nrow(Vb)), Sigma = Vb)
Cg <- predict(m, newdata, type = "lpmatrix")
simDev <- Cg %*% t(BUdiff)
absDev <- abs(sweep(simDev, 1, se.fit, FUN = "/"))
masd <- apply(absDev, 2L, max)
crit <- quantile(masd, prob = 0.95, type = 8)
pred <- data.frame(newdata,fit=pred$fit,se.fit=pred$se.fit)
pred <- mutate(pred,
uprP = fit + (2 * se.fit),
lwrP = fit - (2 * se.fit),
uprS = fit + (crit * se.fit),
lwrS = fit - (crit * se.fit)
)
return(pred)
}#----------------------------------
# prep data for spline fits
#----------------------------------
dl <- dl %>%
ungroup() %>%
mutate(community=factor(community),
dummy=1)
#----------------------------------
# choose the smoothing parameter
# for the splines, k,
# based on cross-validated MSE
# pick smallest k where CV MSE is
# close to its minimum
#----------------------------------
# library(SuperLearner)
# library(mgcv)
# source(here("R","SL.mgcv.R")) # mgcv wrapper for SuperLearner
# # k=4
# set.seed(123)
# sld <- filter(dl,antigen=="msp1")
# cv_msp1 <- SuperLearner(Y=sld$logmfi,X=select(sld,age), SL.library = paste("SL.mgcv.k",4:10,sep=""))
# cv_msp1
# # k=4
# set.seed(123)
# sld <- filter(dl,antigen=="vsp5")
# cv_vsp5 <- SuperLearner(Y=sld$logmfi,X=select(sld,age), SL.library = paste("SL.mgcv.k",4:10,sep=""))
# cv_vsp5
# # k=4
# set.seed(123)
# sld <- filter(dl,antigen=="ashb")
# cv_ashb <- SuperLearner(Y=sld$logmfi,X=select(sld,age), SL.library = paste("SL.mgcv.k",4:10,sep=""))
# cv_ashb
# # k=4
# set.seed(123)
# sld <- filter(dl,antigen=="sea")
# cv_sea <- SuperLearner(Y=sld$logmfi,X=select(sld,age), SL.library = paste("SL.mgcv.k",4:10,sep=""))
# cv_sea#----------------------------------
# fit GAM with a spline for age
# include a random effect for cluster
# estimate simultaneous CIs around the curve
# for the prediction data, set the dummy to 0 to
# zero out all of the random effects
# see posts on Stack Exchange for explanation:
# https://stats.stackexchange.com/questions/131106/predicting-with-random-effects-in-mgcv-gam/131116#131116
# https://stats.stackexchange.com/questions/189384/predicting-mean-smooth-in-gam-with-smooth-by-random-factor-interaction
#----------------------------------
# Pf Malaria MSP1
fit_msp1 <- mgcv::gam(logmfi~s(age,bs="cr",k=4) + s(community,bs="re",by=dummy),
data=filter(dl,antigen=="msp1"))
newd <- dl %>% filter(antigen=="msp1") %>% mutate(dummy=0)
fit_msp1ci <- gamCI(m=fit_msp1,newdata=newd,nreps=10000)
# Giardia VSP5
fit_vsp5 <- mgcv::gam(logmfi~s(age,bs="cr",k=4) + s(community,bs="re",by=dummy),
data=filter(dl,antigen=="vsp5"))
newd <- dl %>% filter(antigen=="vsp5") %>% mutate(dummy=0)
fit_vsp5ci <- gamCI(m=fit_vsp5,newdata=newd,nreps=10000)
# Ascaris AsHb
fit_ashb <- mgcv::gam(logmfi~s(age,bs="cr",k=4) + s(community,bs="re",by=dummy),
data=filter(dl,antigen=="ashb"))
newd <- dl %>% filter(antigen=="ashb") %>% mutate(dummy=0)
fit_ashbci <- gamCI(m=fit_ashb,newdata=newd,nreps=10000)
# Schisto SEA
fit_sea <- mgcv::gam(logmfi~s(age,bs="cr",k=4) + s(community,bs="re",by=dummy),
data=filter(dl,antigen=="sea"))
newd <- dl %>% filter(antigen=="sea") %>% mutate(dummy=0)
fit_seaci <- gamCI(m=fit_sea,newdata=newd,nreps=10000)
fit_mfi <- bind_rows(fit_msp1ci,fit_vsp5ci,fit_ashbci,fit_seaci)pagemfi <- ggplot(data=fit_mfi,aes(x=age,y=fit,color=antigenf)) +
facet_grid(~antigenf) +
geom_point(aes(x=age,y=logmfi),alpha=0.1,size=0.2)+
geom_ribbon(aes(ymin=lwrS,ymax=uprS),alpha=0.2,color=NA,fill="black") +
geom_line(lwd=0.5,alpha=0.5,color="black") +
geom_smooth(method="loess",se=FALSE,lwd=0.5,color="black") +
scale_color_manual(values=pcols)+
scale_y_continuous(breaks=seq(0,4,by=1))+
labs(x="age, years",y="log10 Luminex Response (MFI-bg)") +
theme_minimal() +
theme(legend.position="none")
pagemfi# Ascaris AsHb
fitp_ashb <- mgcv::gam(seropos~s(age,bs="cr",k=11) + s(community,bs="re",by=dummy), family="binomial",
data=filter(dl,antigen=="ashb"))
newd <- dl %>% filter(antigen=="ashb") %>% mutate(dummy=0)
fitp_ashbci <- gamCI(m=fitp_ashb,newdata=newd,nreps=10000)
# Schisto SEA
fitp_sea <- mgcv::gam(seropos~s(age,bs="cr",k=9) + s(community,bs="re",by=dummy), family="binomial",
data=filter(dl,antigen=="sea"))
newd <- dl %>% filter(antigen=="sea") %>% mutate(dummy=0)
fitp_seaci <- gamCI(m=fitp_sea,newdata=newd,nreps=10000)
# Pf Malaria MSP1
fitp_msp1 <- mgcv::gam(seropos~s(age,bs="cr",k=4) + s(community,bs="re",by=dummy),
data=filter(dl,antigen=="msp1"))
newd <- dl %>% filter(antigen=="msp1") %>% mutate(dummy=0)
fitp_msp1ci <- gamCI(m=fitp_msp1,newdata=newd,nreps=10000)
# Giardia VSP5
fitp_vsp5 <- mgcv::gam(seropos~s(age,bs="cr",k=4) + s(community,bs="re",by=dummy),
data=filter(dl,antigen=="vsp5"))
newd <- dl %>% filter(antigen=="vsp5") %>% mutate(dummy=0)
fitp_vsp5ci <- gamCI(m=fitp_vsp5,newdata=newd,nreps=10000)
fit_seroprev <- bind_rows(fitp_ashbci,fitp_seaci,fitp_msp1ci,fitp_vsp5ci)
# convert linear predictor to prevalance
expitfn <- function(x) {
exp(x)/(1+exp(x))
}
fit_seroprev <- fit_seroprev %>%
mutate(fit = expitfn(fit),
uprP = expitfn(uprP),
lwrP = expitfn(lwrP),
uprS = expitfn(uprS),
lwrS = expitfn(lwrS),
)pageprev <- ggplot(data=fit_seroprev,aes(x=age,y=fit,color=antigenf)) +
facet_grid(~antigenf) +
geom_ribbon(aes(ymin=lwrS,ymax=uprS),alpha=0.2,color=NA,fill="grey50") +
geom_line(lwd=0.5,alpha=0.5) +
geom_smooth(method="loess",se=FALSE,lwd=0.5) +
scale_color_manual(values=pcols)+
scale_y_continuous(breaks=seq(0,0.8,by=0.1),labels=seq(0,80,by=10))+
labs(x="age, years",y="Seroprevalence (%)") +
theme_minimal() +
theme(legend.position="none")
pageprevdlc <- dl %>%
group_by(community,antigenf) %>%
mutate(posroc=ifelse(logmfi>roccut,1,0)) %>%
summarize(n=n(),
meanmfi=mean(logmfi),
prevroc=mean(posroc))
# estimate pearson correlation
dcorr <- dlc %>%
ungroup() %>%
group_by(antigenf) %>%
mutate(corroc=cor(meanmfi,prevroc,method="pearson")) %>%
slice(1)
# estimate regression slope
foreach(ai=levels(dlc$antigenf)) %do% {
di <- dlc %>% filter(antigenf==ai)
fit <- lm(prevroc~meanmfi,data=di)
summary(fit)
}[[1]]
Call:
lm(formula = prevroc ~ meanmfi, data = di)
Residuals:
Min 1Q Median 3Q Max
-0.058248 -0.018387 -0.003961 0.020299 0.081329
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) -0.38897 0.08000 -4.862 4.05e-05 ***
meanmfi 0.27861 0.04373 6.371 6.79e-07 ***
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Residual standard error: 0.03368 on 28 degrees of freedom
Multiple R-squared: 0.5917, Adjusted R-squared: 0.5772
F-statistic: 40.58 on 1 and 28 DF, p-value: 6.791e-07
[[2]]
Call:
lm(formula = prevroc ~ meanmfi, data = di)
Residuals:
Min 1Q Median 3Q Max
-0.029840 -0.007884 -0.003631 0.010660 0.024363
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) -0.836794 0.012860 -65.07 <2e-16 ***
meanmfi 0.418625 0.004177 100.22 <2e-16 ***
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Residual standard error: 0.01353 on 28 degrees of freedom
Multiple R-squared: 0.9972, Adjusted R-squared: 0.9971
F-statistic: 1.004e+04 on 1 and 28 DF, p-value: < 2.2e-16
[[3]]
Call:
lm(formula = prevroc ~ meanmfi, data = di)
Residuals:
Min 1Q Median 3Q Max
-0.047709 -0.014638 0.002135 0.013209 0.059840
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) -0.34294 0.05604 -6.119 1.33e-06 ***
meanmfi 0.33966 0.01754 19.362 < 2e-16 ***
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Residual standard error: 0.02506 on 28 degrees of freedom
Multiple R-squared: 0.9305, Adjusted R-squared: 0.928
F-statistic: 374.9 on 1 and 28 DF, p-value: < 2.2e-16
[[4]]
Call:
lm(formula = prevroc ~ meanmfi, data = di)
Residuals:
Min 1Q Median 3Q Max
-0.055515 -0.012084 -0.001454 0.010680 0.048189
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) -0.38331 0.08568 -4.474 0.000117 ***
meanmfi 0.36491 0.03142 11.613 3.22e-12 ***
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Residual standard error: 0.02448 on 28 degrees of freedom
Multiple R-squared: 0.8281, Adjusted R-squared: 0.8219
F-statistic: 134.9 on 1 and 28 DF, p-value: 3.217e-12
pmfivprev <- ggplot(data=dlc,aes(x=meanmfi,color=antigenf)) +
facet_grid(~antigenf) +
geom_point(aes(y=prevroc),alpha=0.7) +
geom_smooth(aes(y=prevroc),method="glm",color="gray40",lwd=0.5,se=FALSE) +
geom_text(data=dcorr,
aes(x=2.5,y=0.95,label=paste("r ==",sprintf("%1.2f",corroc)) ),
parse=TRUE,col="grey30") +
scale_y_continuous(breaks=seq(0,1,by=0.2),labels=seq(0,100,by=20))+
scale_color_manual(values=pcols)+
coord_cartesian(ylim=c(0,1),xlim=c(1,4.5)) +
labs(x="community mean log10 Luminex response (MFI-bg)",y="community seroprevalence (%)") +
theme_minimal() +
theme(legend.position="none")
pmfivprev
# save png file for presentation
ggsave(filename=here("output","mbita-mfi-v-seroprev.png"),plot=pmfivprev,device="png",width=8,height=2.5)Estimate community-level FOI for pathogens based on the seroconversion rate (incidence among susceptibles). Assume a constant rate model (i.e., average over all ages). If we assume a constant force of infection (\(\lambda\)), equivalent to assuming an exponential model, then it can be shown that a generalized linear model with a complementary log-log link fit with current status, age-prevalence data is equivalent to an exponential proportional hazards model (Jewell and van der Laan 1995).
\(\log - \log(1-P(Y|A,W)) = \log \lambda + \log A + \beta W\)
Moroever, this model is also equivalent to a catalytic, SIR model with a single, constant rate parameter (Hens et al. 2010; Hens et al. 2012).
# foi_ests <- foreach(ai=c("S. mansoni SEA","P. falciparum MSP-1"),.combine=rbind) %:%
foi_ests <- foreach(ai=levels(dl$antigenf),.combine=rbind) %:%
foreach(comi=levels(dl$community),.combine=rbind) %do% {
di <- dl %>% filter(antigenf==ai & community==comi)
gfit <- glm(seropos~1,offset=log(age),data=di,family=binomial(link="cloglog"))
gsum <- summary(gfit)
lambda <- as.numeric(exp(gfit$coefficients))
log_lambda_se <- sqrt(gsum$cov.unscaled)
lambda_lb <- as.numeric(exp(gfit$coefficients - 1.96*log_lambda_se))
lambda_ub <- as.numeric(exp(gfit$coefficients + 1.96*log_lambda_se))
res <- data.frame(community=comi,antigenf=ai,lambda,lambda_lb,lambda_ub)
return(res)
}
foi_ests community antigenf lambda lambda_lb lambda_ub
1 1 Ascaris spp. AsHb 0.041863782 0.025274054 0.06934290
2 2 Ascaris spp. AsHb 0.042339484 0.024545840 0.07303201
3 3 Ascaris spp. AsHb 0.029149807 0.018368933 0.04625806
4 4 Ascaris spp. AsHb 0.036752516 0.021728011 0.06216618
5 5 Ascaris spp. AsHb 0.022773389 0.011820450 0.04387542
6 6 Ascaris spp. AsHb 0.017998513 0.008994449 0.03601626
7 7 Ascaris spp. AsHb 0.022051622 0.012532810 0.03880008
8 8 Ascaris spp. AsHb 0.033765694 0.021516341 0.05298866
9 9 Ascaris spp. AsHb 0.039616757 0.024617738 0.06375433
10 10 Ascaris spp. AsHb 0.018560518 0.009289544 0.03708394
11 11 Ascaris spp. AsHb 0.036674649 0.023654735 0.05686091
12 12 Ascaris spp. AsHb 0.041096701 0.027067791 0.06239662
13 13 Ascaris spp. AsHb 0.058682356 0.040939748 0.08411432
14 14 Ascaris spp. AsHb 0.047651488 0.030653857 0.07407434
15 15 Ascaris spp. AsHb 0.014752727 0.006151003 0.03538333
16 16 Ascaris spp. AsHb 0.008838747 0.002216440 0.03524726
17 17 Ascaris spp. AsHb 0.020295125 0.008453613 0.04872379
18 18 Ascaris spp. AsHb 0.038267451 0.025642409 0.05710843
19 19 Ascaris spp. AsHb 0.047696957 0.029981772 0.07587943
20 20 Ascaris spp. AsHb 0.037242287 0.018628487 0.07445521
21 21 Ascaris spp. AsHb 0.019827571 0.007457970 0.05271308
22 22 Ascaris spp. AsHb 0.030912610 0.017539299 0.05448276
23 23 Ascaris spp. AsHb 0.081532026 0.048140679 0.13808429
24 24 Ascaris spp. AsHb 0.055838704 0.034722214 0.08979730
25 25 Ascaris spp. AsHb 0.032687260 0.020004998 0.05340950
26 26 Ascaris spp. AsHb 0.028479070 0.017461961 0.04644710
27 27 Ascaris spp. AsHb 0.074941167 0.046955065 0.11960751
28 28 Ascaris spp. AsHb 0.069144354 0.042286493 0.11306073
29 29 Ascaris spp. AsHb 0.042451297 0.025162029 0.07162032
30 30 Ascaris spp. AsHb 0.055777296 0.030801016 0.10100663
31 1 S. mansoni SEA 0.301685606 0.235967892 0.38570589
32 2 S. mansoni SEA 0.280982899 0.214648648 0.36781685
33 3 S. mansoni SEA 0.626139365 0.518527498 0.75608431
34 4 S. mansoni SEA 0.419230259 0.331890906 0.52955356
35 5 S. mansoni SEA 0.434664442 0.344429588 0.54853933
36 6 S. mansoni SEA 0.303684968 0.242314034 0.38059933
37 7 S. mansoni SEA 0.367399794 0.301418740 0.44782421
38 8 S. mansoni SEA 0.559105709 0.463354347 0.67464392
39 9 S. mansoni SEA 0.411512188 0.332893387 0.50869824
40 10 S. mansoni SEA 0.417250184 0.334439865 0.52056508
41 11 S. mansoni SEA 0.558480853 0.460506003 0.67730032
42 12 S. mansoni SEA 0.212817923 0.170920060 0.26498626
43 13 S. mansoni SEA 0.196508610 0.156703863 0.24642426
44 14 S. mansoni SEA 0.264851368 0.209457494 0.33489490
45 15 S. mansoni SEA 0.103910754 0.072629862 0.14866399
46 16 S. mansoni SEA 0.150251972 0.101818298 0.22172493
47 17 S. mansoni SEA 0.042409429 0.022845562 0.07872687
48 18 S. mansoni SEA 0.153709691 0.122821980 0.19236515
49 19 S. mansoni SEA 0.053934849 0.034757328 0.08369366
50 20 S. mansoni SEA 0.032242683 0.015374053 0.06761981
51 21 S. mansoni SEA 0.110300440 0.070220980 0.17325573
52 22 S. mansoni SEA 0.096913615 0.068777839 0.13655923
53 23 S. mansoni SEA 0.081113951 0.047839210 0.13753306
54 24 S. mansoni SEA 0.028012170 0.014581699 0.05381278
55 25 S. mansoni SEA 0.015847599 0.007920106 0.03170998
56 26 S. mansoni SEA 0.196810250 0.157649801 0.24569821
57 27 S. mansoni SEA 0.103958145 0.069101500 0.15639741
58 28 S. mansoni SEA 0.032379144 0.016190025 0.06475648
59 29 S. mansoni SEA 0.042517154 0.025210485 0.07170463
60 30 S. mansoni SEA 0.045251259 0.023560651 0.08691086
61 1 P. falciparum MSP-1 0.407637151 0.320078189 0.51914830
62 2 P. falciparum MSP-1 0.427737126 0.329406474 0.55542032
63 3 P. falciparum MSP-1 0.490430794 0.408358958 0.58899740
64 4 P. falciparum MSP-1 0.497440179 0.393612041 0.62865641
65 5 P. falciparum MSP-1 0.368167187 0.291158296 0.46554427
66 6 P. falciparum MSP-1 0.457272614 0.367294294 0.56929347
67 7 P. falciparum MSP-1 0.362770359 0.297544349 0.44229485
68 8 P. falciparum MSP-1 0.262863661 0.214847856 0.32161040
69 9 P. falciparum MSP-1 0.219009632 0.172317206 0.27835420
70 10 P. falciparum MSP-1 0.321838684 0.256492308 0.40383331
71 11 P. falciparum MSP-1 0.501949931 0.414488378 0.60786682
72 12 P. falciparum MSP-1 0.462092754 0.379376546 0.56284374
73 13 P. falciparum MSP-1 0.588934359 0.481939700 0.71968273
74 14 P. falciparum MSP-1 0.574104330 0.458162656 0.71938596
75 15 P. falciparum MSP-1 0.518413852 0.406380970 0.66133245
76 16 P. falciparum MSP-1 0.286884906 0.206676892 0.39822037
77 17 P. falciparum MSP-1 0.310148268 0.229968269 0.41828357
78 18 P. falciparum MSP-1 0.562744126 0.468443729 0.67602773
79 19 P. falciparum MSP-1 0.508307213 0.401108999 0.64415464
80 20 P. falciparum MSP-1 0.342897988 0.248084342 0.47394781
81 21 P. falciparum MSP-1 0.373106782 0.269934993 0.51571184
82 22 P. falciparum MSP-1 0.503276492 0.395398068 0.64058792
83 23 P. falciparum MSP-1 0.297799158 0.211163176 0.41998013
84 24 P. falciparum MSP-1 0.562119613 0.434555864 0.72712966
85 25 P. falciparum MSP-1 0.546408800 0.443612365 0.67302582
86 26 P. falciparum MSP-1 0.483602893 0.397036708 0.58904316
87 27 P. falciparum MSP-1 0.349169873 0.261968348 0.46539821
88 28 P. falciparum MSP-1 0.593993450 0.441420952 0.79930102
89 29 P. falciparum MSP-1 0.652295400 0.507769041 0.83795831
90 30 P. falciparum MSP-1 0.449384128 0.328414573 0.61491210
91 1 Giardia sp. VSP-5 0.303039279 0.235896095 0.38929345
92 2 Giardia sp. VSP-5 0.221509436 0.165513907 0.29644899
93 3 Giardia sp. VSP-5 0.315019539 0.260903835 0.38035972
94 4 Giardia sp. VSP-5 0.367568707 0.290423541 0.46520593
95 5 Giardia sp. VSP-5 0.365506132 0.289008626 0.46225171
96 6 Giardia sp. VSP-5 0.271173583 0.214900364 0.34218235
97 7 Giardia sp. VSP-5 0.314953175 0.257351293 0.38544785
98 8 Giardia sp. VSP-5 0.360752935 0.298256395 0.43634498
99 9 Giardia sp. VSP-5 0.372873268 0.300987583 0.46192761
100 10 Giardia sp. VSP-5 0.263988144 0.208595594 0.33409018
101 11 Giardia sp. VSP-5 0.232172528 0.187891999 0.28688865
102 12 Giardia sp. VSP-5 0.222601106 0.178955009 0.27689224
103 13 Giardia sp. VSP-5 0.289231163 0.235426303 0.35533271
104 14 Giardia sp. VSP-5 0.297846696 0.236794953 0.37463913
105 15 Giardia sp. VSP-5 0.263812414 0.202842651 0.34310826
106 16 Giardia sp. VSP-5 0.257431919 0.184190485 0.35979705
107 17 Giardia sp. VSP-5 0.243361928 0.177584234 0.33350386
108 18 Giardia sp. VSP-5 0.269187861 0.222283620 0.32598940
109 19 Giardia sp. VSP-5 0.271835038 0.212303320 0.34805997
110 20 Giardia sp. VSP-5 0.199340177 0.139334279 0.28518830
111 21 Giardia sp. VSP-5 0.319205568 0.229272920 0.44441443
112 22 Giardia sp. VSP-5 0.290699506 0.226858556 0.37250613
113 23 Giardia sp. VSP-5 0.258931634 0.181754776 0.36887939
114 24 Giardia sp. VSP-5 0.262222117 0.200019141 0.34376929
115 25 Giardia sp. VSP-5 0.223646968 0.177972964 0.28104250
116 26 Giardia sp. VSP-5 0.265771586 0.216026888 0.32697104
117 27 Giardia sp. VSP-5 0.359869156 0.270273538 0.47916570
118 28 Giardia sp. VSP-5 0.357305232 0.266570015 0.47892494
119 29 Giardia sp. VSP-5 0.306640041 0.237398847 0.39607654
120 30 Giardia sp. VSP-5 0.328328900 0.237820724 0.45328205
# merge FOI estimates to the cluster-level means
d_foi <- dlc %>%
# filter(antigenf=="S. mansoni SEA"|antigenf=="P. falciparum MSP-1") %>%
left_join(foi_ests,by=c("community","antigenf"))# %>%
# mutate(antigenf=factor(antigenf,levels=c("S. mansoni SEA","P. falciparum MSP-1")))
# estimate spearman correlation
dfoicorr <- d_foi %>%
ungroup() %>%
group_by(antigenf) %>%
mutate(cormfi=cor(meanmfi,lambda,method="spearman"),
corprev=cor(prevroc,lambda,method="spearman")) %>%
slice(1)Figure of results
pfoivmfi <- ggplot(data=d_foi,aes(x=meanmfi,y=lambda,color=antigenf)) +
facet_grid(~antigenf)+
geom_point(,alpha=0.7) +
# geom_smooth(method="glm",color="gray40",lwd=0.5,se=FALSE) +
geom_smooth(method="loess",color="gray40",lwd=0.5,se=FALSE) +
geom_text(data=dfoicorr,
aes(x=2.5,y=0.6,label=paste("rho ==",sprintf("%1.2f",cormfi)) ),
parse=TRUE,col="grey30") +
scale_x_continuous(breaks=2:4)+
scale_color_manual(values=pcols)+
coord_cartesian(xlim=c(1.5,4),ylim=c(0,0.7)) +
labs(x="community mean log10 Luminex response (MFI-bg)",y=expression(paste("community seroconversion rate (",lambda,")"))) +
theme_minimal() +
theme(legend.position="none")
pfoivmfi
# save png file for presentation
ggsave(filename=here("output","mbita-mfi-v-foi.png"),plot=pfoivmfi,device="png",width=8,height=2.5)pfoivprev <- ggplot(data=d_foi,aes(x=prevroc,y=lambda,color=antigenf)) +
facet_grid(~antigenf)+
geom_point(alpha=0.7) +
# geom_smooth(method="glm",color="gray40",lwd=0.5,se=FALSE) +
geom_smooth(method="loess",color="gray40",lwd=0.5,se=FALSE) +
geom_text(data=dfoicorr,
aes(x=0.2,y=0.6,label=paste("rho ==",sprintf("%1.2f",corprev)) ),
parse=TRUE,col="grey30") +
scale_x_continuous(breaks=seq(0,1,by=0.2),labels=seq(0,100,by=20))+
scale_color_manual(values=pcols)+
coord_cartesian(xlim=c(0,1),ylim=c(0,0.7)) +
labs(x="community seroprevalence (%)",y=expression(paste("community seroconversion rate (",lambda,")"))) +
theme_minimal() +
theme(legend.position="none")
pfoivprev
# save png file for presentation
ggsave(filename=here("output","mbita-prev-v-foi.png"),plot=pfoivprev,device="png",width=8,height=3)dlckk <- dl %>%
group_by(community,antigenf) %>%
filter(antigen %in% c("ashb","sea")) %>%
mutate(posroc=ifelse(logmfi>roccut,1,0),
logkk=ifelse(epg>0,log10(epg),log10(1)),
poskk=ifelse(epg>0,1,0)) %>%
summarize(n=n(),
meanmfi=mean(logmfi),
prevroc=mean(posroc),
meankk=mean(logkk,na.rm=T),
prevkk=mean(poskk,na.rm=T))
# estimate pearson correlation
dcorrkk <- dlckk %>%
ungroup() %>%
group_by(antigenf) %>%
mutate(corspkk=cor(prevroc,prevkk,method="spearman"),
cormukk=cor(meanmfi,prevkk,method="spearman"),
cormukkmu=cor(meanmfi,meankk,method="spearman")) %>%
slice(1)Figure of results
# KK prevalence versus MFI
pmfivkk <- ggplot(data=dlckk,aes(x=meanmfi,color=antigenf)) +
facet_grid(~antigenf) +
geom_point(aes(y=prevkk),alpha=0.7) +
geom_smooth(aes(y=prevkk),method="loess",color="gray40",lwd=0.5,se=FALSE) +
geom_text(data=dcorrkk,
aes(x=1,y=0.55,label=paste("rho ==",sprintf("%1.2f",cormukk)) ),
parse=TRUE,col="grey30",hjust=0) +
scale_y_continuous(breaks=seq(0,0.6,by=0.2),labels=seq(0,60,by=20))+
scale_color_manual(values=pcols)+
coord_cartesian(ylim=c(0,0.6),xlim=c(1,4.5)) +
labs(x="community mean log10 Luminex response",y="community prevalence by Kato-Katz (%)") +
theme_minimal() +
theme(legend.position="none")
pmfivkk
# save png file for presentation
ggsave(filename=here("output","mbita-mfi-v-kkprev.png"),plot=pmfivkk,device="png",width=5,height=3)# KK mean versus MFI
pmfivkkmean <- ggplot(data=dlckk,aes(x=meanmfi,color=antigenf)) +
facet_grid(~antigenf) +
geom_point(aes(y=meankk),alpha=0.7) +
geom_smooth(aes(y=meankk),method="loess",color="gray40",lwd=0.5,se=FALSE) +
geom_text(data=dcorrkk,
aes(x=1,y=1,label=paste("rho ==",sprintf("%1.2f",cormukkmu)) ),
parse=TRUE,col="grey30",hjust=0) +
scale_y_continuous(breaks=seq(0,1,by=0.2))+
scale_color_manual(values=pcols)+
coord_cartesian(ylim=c(0,1.1),xlim=c(1,4.5)) +
labs(x="community mean log10 Luminex response",y="community mean log10 eggs per gram") +
theme_minimal() +
theme(legend.position="none")
pmfivkkmean
# save png file for presentation
ggsave(filename=here("output","mbita-mfi-v-kkmean.png"),plot=pmfivkkmean,device="png",width=5,height=3)# KK prevalence versus seroprevalence
pspvkk <- ggplot(data=dlckk,aes(x=prevroc,color=antigenf)) +
facet_grid(~antigenf) +
geom_point(aes(y=prevkk),alpha=0.7) +
geom_smooth(aes(y=prevkk),method="loess",color="gray40",lwd=0.5,se=FALSE) +
geom_abline(intercept=0,slope=1,lty="dashed",color="gray40",lwd=0.25)+
geom_text(data=dcorrkk,
aes(x=0.1,y=0.8,label=paste("rho ==",sprintf("%1.2f",corspkk)) ),
parse=TRUE,col="grey30",hjust=0) +
scale_y_continuous(breaks=seq(0,0.8,by=0.2),labels=seq(0,80,by=20))+
scale_x_continuous(breaks=seq(0,0.8,by=0.2),labels=seq(0,80,by=20))+
scale_color_manual(values=pcols)+
coord_cartesian(ylim=c(0,0.9),xlim=c(0,0.9)) +
labs(x="community seroprevalence (%)",y="community prevalence by Kato-Katz (%)") +
theme_minimal() +
theme(legend.position="none")
pspvkk
# save png file for presentation
ggsave(filename=here("output","mbita-serop-v-kkprev.png"),plot=pspvkk,device="png",width=5,height=3)Downsample the observations within each community to see how that influences the relationships
Note: this analysis probably won’t contribute to the presentation because not sufficient time to think about it and interpret it.
# estimate means with sample sizes of betweeen 10 and 50 observations
set.seed(123)
ssests <- foreach(ssi=seq(100,10,by=-10),.combine=rbind) %:%
foreach(iteri=1:1000,.combine=rbind) %dopar% {
di <- dl %>%
group_by(community,antigenf) %>%
sample_n(size=ssi,replace=TRUE) %>%
summarize(meanmfi=mean(logmfi),
seroprev=mean(seropos)) %>%
mutate(ss=ssi,iter=iteri)
}
# merge in the means with the full sample
dlc2 <- dlc %>%
select(community,antigenf,n,mean=meanmfi,serop=prevroc) %>%
left_join(select(d_foi,community,antigenf,lambda),by=c("community","antigenf")) %>%
mutate(antigenf=factor(antigenf,levels=levels(dl$antigenf)))
ssests2 <- left_join(ssests,dlc2,by=c("community","antigenf"))
# average over bootstrap estimates
ssmeans <- ssests2 %>%
group_by(community,antigenf,ss) %>%
summarize(mean=mean(mean),
mu=mean(meanmfi),
mu_sd=sd(meanmfi),
mu_lb=quantile(meanmfi,probs=c(0.025)),
mu_ub=quantile(meanmfi,probs=c(0.975)),
mu_mse=mean((meanmfi-mean)^2),
mu_bias=mean(meanmfi-mean),
serop=mean(serop),
prev=mean(seroprev),
prev_sd=sd(seroprev),
prev_lb=quantile(seroprev,probs=c(0.025)),
prev_ub=quantile(seroprev,probs=c(0.975)),
prev_mse=mean((seroprev-serop)^2),
prev_bias=mean(seroprev-serop)
)
# correlation betwen community-level means and seroprev, over bootstrap estimates
sscorr <- ssests2 %>%
group_by(antigenf,ss,iter) %>%
summarize(cor_mean=cor(meanmfi,serop,method="pearson"),
cor_prev=cor(seroprev,serop,method="pearson")) %>%
ungroup() %>%
group_by(antigenf,ss) %>%
summarize(cor_mu=mean(cor_mean),
cor_mu_lb=quantile(cor_mean,probs=c(0.025)),
cor_mu_ub=quantile(cor_mean,probs=c(0.975)),
cor_pr=mean(cor_prev),
cor_pr_lb=quantile(cor_prev,probs=c(0.025)),
cor_pr_ub=quantile(cor_prev,probs=c(0.975)),
)
# correlation betwen community-level means and seroprev, over bootstrap estimates
sscorr_foi <- ssests2 %>%
group_by(antigenf,ss,iter) %>%
filter(!is.na(lambda)) %>%
summarize(cor_mean=cor(meanmfi,lambda,method="spearman"),
cor_prev=cor(seroprev,lambda,method="spearman")) %>%
ungroup() %>%
group_by(antigenf,ss) %>%
summarize(cor_mu=mean(cor_mean),
cor_mu_lb=quantile(cor_mean,probs=c(0.025)),
cor_mu_ub=quantile(cor_mean,probs=c(0.975)),
cor_pr=mean(cor_prev),
cor_pr_lb=quantile(cor_prev,probs=c(0.025)),
cor_pr_ub=quantile(cor_prev,probs=c(0.975)),
)Plot a figure of RMSE in community-level estimates by sample size.
ssmumsep <- ggplot(data=ssmeans,aes(x=ss,y=sqrt(mu_mse),group=community)) +
facet_grid(.~antigenf)+
geom_line(alpha=0.2) +
theme_minimal()
ssmumsepssprevmsep <- ggplot(data=ssmeans,aes(x=ss,y=sqrt(prev_mse),group=community)) +
facet_grid(.~antigenf)+
geom_line(alpha=0.2) +
theme_minimal()
ssprevmsepPlot a figure of the correlation between the seroprevalence estimated in the full sample versus cluster level mean (blue) or cluster level seroprevalence (orange) estimated at each smaller sample size
sscorp <- ggplot(data=sscorr,aes(x=ss)) +
facet_grid(.~antigenf)+
geom_ribbon(aes(ymin=cor_mu_lb,ymax=cor_mu_ub),color=NA,fill=cblue,alpha=0.2)+
geom_line(aes(y=cor_mu),color=cblue) +
geom_ribbon(aes(ymin=cor_pr_lb,ymax=cor_pr_ub),color=NA,fill=corange,alpha=0.2)+
geom_line(aes(y=cor_pr),color=corange)+
coord_cartesian(ylim=c(0,1))+
labs(y="Correlation with community level seroprevalence",x="Sample size per community") +
theme_minimal()
sscorpPlot a figure of the correlation between the seroconversion rate estimated in the full sample versus cluster level mean (blue) or cluster level seroprevalence (orange) estimated at each smaller sample size
sscorfoip <- ggplot(data=sscorr_foi,aes(x=ss)) +
facet_grid(.~antigenf)+
geom_ribbon(aes(ymin=cor_mu_lb,ymax=cor_mu_ub),color=NA,fill=cblue,alpha=0.2)+
geom_line(aes(y=cor_mu),color=cblue) +
geom_ribbon(aes(ymin=cor_pr_lb,ymax=cor_pr_ub),color=NA,fill=corange,alpha=0.2)+
geom_line(aes(y=cor_pr),color=corange)+
coord_cartesian(ylim=c(0,1))+
labs(y="Correlation with community level seroconversion rate",x="Sample size per community") +
theme_minimal()
sscorfoipBased on these figures, there is no loss of information from reducing quantitative measures to seroprevalence, and if anything seroprevalence performs slightly better with respect to correlation measures.